Code
library(tidyverse)
library(patchwork)
library(khroma)In R/Analyses_for_paper/01b_medians_and_their_distns.qmd, we went a step further than calculating long-term medians. For each parameter, we used the compiled monthly medians (all months) to generate quantiles describing the monthly medians. So now, not only do we have a single long-term median for each parameter at each station, we also have min/max (WQ only); 5th, 10th, 25th, 50th, 75th, 90th, and 95th percentiles. For nutrients, robust regression on order statistics (ROS) was used to calculate these, and min/max were excluded but the others were generated by default. For WQ, the normal quantile R function was used to generate all of the listed quantiles.
In this file, those will be joined with descriptive characteristics - bioregion from SWMP clue, for ordering; and clusters from Carl and Dave’s work - and I’ll generate a figure for each parameter across all stations. In the first part, the parameters will be in the same order in every figure (bioregion – reserve – station), for ease of finding “your own” stations. In the second part, medians will ordered by magnitude and stations will not be labelled, for potential use in papers and/or supplementary information.
library(tidyverse)
library(patchwork)
library(khroma)medians_in <- here::here("Outputs",
"01_calculated_medians",
"WQ-NUT_overallMediansAndQuantiles.csv")
trends_in <- here::here("Outputs",
"02_calculated_long-term-trends",
"long-term-trends.csv")
trends_nut_in <- here::here("Outputs",
"02_calculated_long-term-trends",
"NUT_trends_back-transformed.csv")
# to get SWmP CLUE params, clusters, and other info
other_in <- here::here("Outputs",
"04_compiled_predictors",
"compiled_predictors_withExternalInfo.csv")meds <- read.csv(medians_in)
trnds <- read.csv(trends_in)
trnds_nut <- read.csv(trends_nut_in)
other <- read.csv(other_in)# this is the data frame that's been reduced to only stations included for final analysis, so will want to left join on this
other_trimmed <- other |>
select(-all_of(ends_with("_trend")),
-all_of(ends_with("_median"))) |>
mutate(Ecoregion = case_when(Ecoregion == "Oregon, Washington, Vancouver Coast and Shelf" ~ "Oregon, Washington, Vancouver",
.default = Ecoregion))
key_stns <- other |>
select(reserve, station)trnds_wq <- trnds |>
filter(parameter %in% c("do_mgl_median",
"do_pct_median",
"sal_median",
"spcond_median",
"temp_median",
"turb_median")) |>
mutate(parameter = str_remove(parameter, "_median")) |>
select(station,
param = parameter,
trend = Slope,
ciLow_trend = conf.low,
ciHigh_trend = conf.high,
p_trend = p.value,
sig_seasonality)
trnds_wqnut <- bind_rows(trnds_wq, trnds_nut) |>
mutate(station = substr(station, 1, 5),
param = case_when(param == "chla_n" ~ "chla",
.default = param))
trnds_and_meds <- left_join(trnds_wqnut,
meds,
by = c("station", "param"))
all <- left_join(key_stns,
trnds_and_meds,
by = "station") |>
left_join(other_trimmed,
by = c("reserve", "station"))stn_factor <- other_trimmed |>
select(Ecoregion, reserve, station) |>
arrange(Ecoregion, reserve, station) |>
mutate(stn_factor = fct_inorder(station)) |>
select(station, stn_factor)
all <- left_join(all, stn_factor, by = "station")# manual shapes
shps <- c(18, 1, 2, 4, 10, 8, 9, 18, 1, 2, 4, 10, 8, 9)
# manual colors - don't vary with the same repetition as shapes,
# so every ecoregion will have a unique shape*color combination
# even though shapes and colors are not themselves unique
# pal <- c(color("muted")(9), color("muted")(4))
pal <- as.vector(color("muted")(9))
pal <- c(pal, pal)Bars around points indicate:
Vertical lines on plots indicate:
If you only look at one set of graphs, look at the first batch - organized within Ecoregion, colored by Ecoregion. I think these are the most appealing, but created other versions because I thought people would ask what they’d look like.
Stations are ordered by Ecoregion, followed by Reserve, then Station.
wq_parms <- c("do_mgl", "do_pct", "sal", "spcond", "temp", "turb")
for(i in seq_along(wq_parms)){
parm = wq_parms[i]
sub_dat <- filter(all, param == parm)
p_meds <- ggplot(sub_dat,
aes(col = Ecoregion,
shape = Ecoregion)) +
geom_vline(xintercept = median(sub_dat$p_50, na.rm = TRUE),
col = "gray40") +
geom_pointrange(aes(y = stn_factor,
x = p_50,
xmin = p_25,
xmax = p_75)) +
scale_color_manual(values = pal) +
scale_shape_manual(values = shps) +
labs(y = NULL,
x = "monthly median")
p_trnds <- ggplot(sub_dat,
aes(col = Ecoregion,
shape = Ecoregion)) +
geom_vline(xintercept = 0,
col = "gray40") +
geom_pointrange(aes(y = stn_factor,
x = trend,
xmin = ciLow_trend,
xmax = ciHigh_trend)) +
scale_color_manual(values = pal) +
scale_shape_manual(values = shps) +
labs(y = NULL,
x = "trend (measurement units/yr)")
p_out <- p_meds + p_trnds +
plot_annotation(title = parm,
caption = "bars around monthly median represent the 25th-75th percentiles of monthly \nmedians at that station; bars for trend are 95% confidence interval.\nvertical lines represent overall median across all stations (left); zero trend (right)") +
plot_layout(guides = 'collect',
axes = 'collect') &
theme_bw() &
theme(legend.position = 'bottom',
legend.title.position = 'top',
legend.margin = margin(0, 0, 0, 0),
legend.text = element_text(size = rel(0.7)),
legend.key.spacing.y = unit(0.2, "pt"),
axis.text.y = element_text(size = rel(0.9))) &
guides(col = guide_legend(byrow = TRUE,
ncol = 3))
print(p_out)
}nut_parms <- c("chla", "nh4f", "no23f", "po4f")
for(i in seq_along(nut_parms)){
parm = nut_parms[i]
sub_dat <- filter(all, param == parm)
p_meds <- ggplot(sub_dat,
aes(col = Ecoregion,
shape = Ecoregion)) +
geom_vline(xintercept = median(sub_dat$p_50, na.rm = TRUE),
col = "gray40") +
geom_pointrange(aes(y = stn_factor,
x = p_50,
xmin = p_25,
xmax = p_75)) +
scale_x_log10() +
scale_color_manual(values = pal) +
scale_shape_manual(values = shps) +
labs(y = NULL,
x = "monthly median")
p_trnds <- ggplot(sub_dat,
aes(col = Ecoregion,
shape = Ecoregion)) +
geom_vline(xintercept = 0,
col = "gray40") +
geom_pointrange(aes(y = stn_factor,
x = trend_pctPerYear,
xmin = ciLow_pctPerYear,
xmax = ciHigh_pctPerYear)) +
scale_color_manual(values = pal) +
scale_shape_manual(values = shps) +
labs(y = NULL,
x = "trend (% change/yr)")
p_out <- p_meds + p_trnds +
plot_annotation(title = parm,
caption = "bars around monthly median represent the 25th-75th percentiles of monthly \nmedians at that station; bars for trend are 95% confidence interval.\nvertical lines represent overall median across all stations (left); zero trend (right)") +
plot_layout(guides = 'collect',
axes = 'collect') &
theme_bw() &
theme(legend.position = 'bottom',
legend.title.position = 'top',
legend.margin = margin(0, 0, 0, 0),
legend.text = element_text(size = rel(0.7)),
legend.key.spacing.y = unit(0.2, "pt"),
axis.text.y = element_text(size = rel(0.9))) &
guides(col = guide_legend(byrow = TRUE,
ncol = 3))
print(p_out)
}wq_parms <- c("do_mgl", "do_pct", "sal", "spcond", "temp", "turb")
for(i in seq_along(wq_parms)){
parm = wq_parms[i]
sub_dat <- filter(all, param == parm)
p_meds <- ggplot(sub_dat,
aes(col = cluster,
shape = cluster)) +
geom_vline(xintercept = median(sub_dat$p_50, na.rm = TRUE),
col = "gray40") +
geom_pointrange(aes(y = stn_factor,
x = p_50,
xmin = p_25,
xmax = p_75)) +
scale_color_manual(values = pal) +
scale_shape_manual(values = shps) +
labs(y = NULL,
x = "monthly median")
p_trnds <- ggplot(sub_dat,
aes(col = cluster,
shape = cluster)) +
geom_vline(xintercept = 0,
col = "gray40") +
geom_pointrange(aes(y = stn_factor,
x = trend,
xmin = ciLow_trend,
xmax = ciHigh_trend)) +
scale_color_manual(values = pal) +
scale_shape_manual(values = shps) +
labs(y = NULL,
x = "trend (measurement units/yr)")
p_out <- p_meds + p_trnds +
plot_annotation(title = parm,
caption = "bars around monthly median represent the 25th-75th percentiles of monthly \nmedians at that station; bars for trend are 95% confidence interval.\nvertical lines represent overall median across all stations (left); zero trend (right)") +
plot_layout(guides = 'collect',
axes = 'collect') &
theme_bw() &
theme(legend.position = 'bottom',
axis.text.y = element_text(size = rel(0.9)))
print(p_out)
}nut_parms <- c("chla", "nh4f", "no23f", "po4f")
for(i in seq_along(nut_parms)){
parm = nut_parms[i]
sub_dat <- filter(all, param == parm)
p_meds <- ggplot(sub_dat,
aes(col = cluster,
shape = cluster)) +
geom_vline(xintercept = median(sub_dat$p_50, na.rm = TRUE),
col = "gray40") +
geom_pointrange(aes(y = stn_factor,
x = p_50,
xmin = p_25,
xmax = p_75)) +
scale_x_log10() +
scale_color_manual(values = pal) +
scale_shape_manual(values = shps) +
labs(y = NULL,
x = "monthly median")
p_trnds <- ggplot(sub_dat,
aes(col = cluster,
shape = cluster)) +
geom_vline(xintercept = 0,
col = "gray40") +
geom_pointrange(aes(y = stn_factor,
x = trend_pctPerYear,
xmin = ciLow_pctPerYear,
xmax = ciHigh_pctPerYear)) +
scale_color_manual(values = pal) +
scale_shape_manual(values = shps) +
labs(y = NULL,
x = "trend (% change/yr)")
p_out <- p_meds + p_trnds +
plot_annotation(title = parm,
caption = "bars around monthly median represent the 25th-75th percentiles of monthly \nmedians at that station; bars for trend are 95% confidence interval.\nvertical lines represent overall median across all stations (left); zero trend (right)") +
plot_layout(guides = 'collect',
axes = 'collect') &
theme_bw() &
theme(legend.position = 'bottom',
axis.text.y = element_text(size = rel(0.9)))
print(p_out)
}wq_parms <- c("do_mgl", "do_pct", "sal", "spcond", "temp", "turb")
for(i in seq_along(wq_parms)){
parm = wq_parms[i]
sub_dat <- filter(all, param == parm)
p_meds <- ggplot(sub_dat,
aes(col = Ecoregion,
shape = Ecoregion)) +
geom_vline(xintercept = median(sub_dat$p_50, na.rm = TRUE),
col = "gray40") +
geom_pointrange(aes(y = station,
x = p_50,
xmin = p_25,
xmax = p_75)) +
scale_color_manual(values = pal) +
scale_shape_manual(values = shps) +
labs(y = NULL,
x = "monthly median")
p_trnds <- ggplot(sub_dat,
aes(col = Ecoregion,
shape = Ecoregion)) +
geom_vline(xintercept = 0,
col = "gray40") +
geom_pointrange(aes(y = station,
x = trend,
xmin = ciLow_trend,
xmax = ciHigh_trend)) +
scale_color_manual(values = pal) +
scale_shape_manual(values = shps) +
labs(y = NULL,
x = "trend (measurement units/yr)")
p_out <- p_meds + p_trnds +
plot_annotation(title = parm,
caption = "bars around monthly median represent the 25th-75th percentiles of monthly \nmedians at that station; bars for trend are 95% confidence interval.\nvertical lines represent overall median across all stations (left); zero trend (right)") +
plot_layout(guides = 'collect',
axes = 'collect') &
theme_bw() &
theme(legend.position = 'bottom',
legend.title.position = 'top',
legend.margin = margin(0, 0, 0, 0),
legend.text = element_text(size = rel(0.7)),
legend.key.spacing.y = unit(0.2, "pt"),
axis.text.y = element_text(size = rel(0.9))) &
guides(col = guide_legend(byrow = TRUE,
ncol = 3))
print(p_out)
}nut_parms <- c("chla", "nh4f", "no23f", "po4f")
for(i in seq_along(nut_parms)){
parm = nut_parms[i]
sub_dat <- filter(all, param == parm)
p_meds <- ggplot(sub_dat,
aes(col = Ecoregion,
shape = Ecoregion)) +
geom_vline(xintercept = median(sub_dat$p_50, na.rm = TRUE),
col = "gray40") +
geom_pointrange(aes(y = station,
x = p_50,
xmin = p_25,
xmax = p_75)) +
scale_x_log10() +
scale_color_manual(values = pal) +
scale_shape_manual(values = shps) +
labs(y = NULL,
x = "monthly median")
p_trnds <- ggplot(sub_dat,
aes(col = Ecoregion,
shape = Ecoregion)) +
geom_vline(xintercept = 0,
col = "gray40") +
geom_pointrange(aes(y = station,
x = trend_pctPerYear,
xmin = ciLow_pctPerYear,
xmax = ciHigh_pctPerYear)) +
scale_color_manual(values = pal) +
scale_shape_manual(values = shps) +
labs(y = NULL,
x = "trend (% change/yr)")
p_out <- p_meds + p_trnds +
plot_annotation(title = parm,
caption = "bars around monthly median represent the 25th-75th percentiles of monthly \nmedians at that station; bars for trend are 95% confidence interval.\nvertical lines represent overall median across all stations (left); zero trend (right)") +
plot_layout(guides = 'collect',
axes = 'collect') &
theme_bw() &
theme(legend.position = 'bottom',
legend.title.position = 'top',
legend.margin = margin(0, 0, 0, 0),
legend.text = element_text(size = rel(0.7)),
legend.key.spacing.y = unit(0.2, "pt"),
axis.text.y = element_text(size = rel(0.9))) &
guides(col = guide_legend(byrow = TRUE,
ncol = 3))
print(p_out)
}From Dave and Carl’s clustering
wq_parms <- c("do_mgl", "do_pct", "sal", "spcond", "temp", "turb")
for(i in seq_along(wq_parms)){
parm = wq_parms[i]
sub_dat <- filter(all, param == parm)
p_meds <- ggplot(sub_dat,
aes(col = cluster,
shape = cluster)) +
geom_vline(xintercept = median(sub_dat$p_50, na.rm = TRUE),
col = "gray40") +
geom_pointrange(aes(y = station,
x = p_50,
xmin = p_25,
xmax = p_75)) +
scale_color_manual(values = pal) +
scale_shape_manual(values = shps) +
labs(y = NULL,
x = "monthly median")
p_trnds <- ggplot(sub_dat,
aes(col = cluster,
shape = cluster)) +
geom_vline(xintercept = 0,
col = "gray40") +
geom_pointrange(aes(y = station,
x = trend,
xmin = ciLow_trend,
xmax = ciHigh_trend)) +
scale_color_manual(values = pal) +
scale_shape_manual(values = shps) +
labs(y = NULL,
x = "trend (measurement units/yr)")
p_out <- p_meds + p_trnds +
plot_annotation(title = parm,
caption = "bars around monthly median represent the 25th-75th percentiles of monthly \nmedians at that station; bars for trend are 95% confidence interval.\nvertical lines represent overall median across all stations (left); zero trend (right)") +
plot_layout(guides = 'collect',
axes = 'collect') &
theme_bw() &
theme(legend.position = 'bottom',
axis.text.y = element_text(size = rel(0.9)))
print(p_out)
}nut_parms <- c("chla", "nh4f", "no23f", "po4f")
for(i in seq_along(nut_parms)){
parm = nut_parms[i]
sub_dat <- filter(all, param == parm)
p_meds <- ggplot(sub_dat,
aes(col = cluster,
shape = cluster)) +
geom_vline(xintercept = median(sub_dat$p_50, na.rm = TRUE),
col = "gray40") +
geom_pointrange(aes(y = station,
x = p_50,
xmin = p_25,
xmax = p_75)) +
scale_x_log10() +
scale_color_manual(values = pal) +
scale_shape_manual(values = shps) +
labs(y = NULL,
x = "monthly median")
p_trnds <- ggplot(sub_dat,
aes(col = cluster,
shape = cluster)) +
geom_vline(xintercept = 0,
col = "gray40") +
geom_pointrange(aes(y = station,
x = trend_pctPerYear,
xmin = ciLow_pctPerYear,
xmax = ciHigh_pctPerYear)) +
scale_color_manual(values = pal) +
scale_shape_manual(values = shps) +
labs(y = NULL,
x = "trend (% change/yr)")
p_out <- p_meds + p_trnds +
plot_annotation(title = parm,
caption = "bars around monthly median represent the 25th-75th percentiles of monthly \nmedians at that station; bars for trend are 95% confidence interval.\nvertical lines represent overall median across all stations (left); zero trend (right)") +
plot_layout(guides = 'collect',
axes = 'collect') &
theme_bw() &
theme(legend.position = 'bottom',
axis.text.y = element_text(size = rel(0.9)))
print(p_out)
}